Chapter 4 Community composition
Rows: 190 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): microsample
dbl (1): quality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
4.1 Taxonomy barplot
4.1.1 Positive samples, coverage-filtered
#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
mutate(section=unlist(section)) %>%
filter(!is.na(count)) %>%
filter(count > 0) %>%
filter(section != "Ileum") %>%
filter(type == "Positive") %>%
filter(quality == 5) %>%
ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors[-4]) +
labs(x = "Relative abundance", y="Microsamples") +
facet_nested(cryosection ~ ., scales="free_y") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(strip.text.y = element_text(angle = 0),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.spacing = unit(0, "lines")) +
labs(fill="Phylum")4.1.2 Positive samples, coverage-unfiltered
#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
mutate(section=unlist(section)) %>%
filter(!is.na(count)) %>%
filter(count > 0) %>%
filter(section != "Ileum") %>%
filter(type == "Positive") %>%
filter(quality == 5) %>%
ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(x = "Relative abundance", y="Microsamples") +
facet_nested(cryosection ~ ., scales="free_y") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(strip.text.y = element_text(angle = 0),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.spacing = unit(0, "lines")) +
labs(fill="Phylum")4.1.3 Control samples, coverage-unfiltered
#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
#filter(phylum %in% c("p__Actinomycetota","p__Bacillota","p__Bacillota_A","p__Pseudomonadota","p__Verrucomicrobiota")) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
filter(is.na(Xcoord)) %>%
filter(type %in% c("NegativeMembrane","NegativeCollection","NegativeReaction")) %>%
ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(x = "Relative abundance", y="Membrane controls") +
facet_nested(cryosection + type ~ ., scales="free_y") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(strip.text.y = element_text(angle = 0),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.spacing = unit(0, "lines")) +
labs(fill="Phylum")vertical_tree <- force.ultrametric(genome_tree,method="extend") %>%
ggtree(., size = 0.3) + geom_tiplab()***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
Also defined by 'tidytree'
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
select(phylum)Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
select(colors) %>%
pull()Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vertical_tree <- gheatmap(vertical_tree, phylum_colors, offset=-0.3, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic) +
new_scale_fill()Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
sample_selection <- sample_metadata %>%
filter(!is.na(Xcoord)) %>%
left_join(quality, by=join_by(microsample==microsample)) %>%
filter(quality>=5) %>% select(microsample) %>% pull()
genome_counts_selected <- genome_counts_filt %>%
select(all_of(c("genome",sample_selection))) %>% column_to_rownames(var="genome") %>% tss()
vertical_tree <- gheatmap(vertical_tree, genome_counts_selected, offset=-0.2, width=0.5, colnames=FALSE, colnames_angle=90, font.size=3, colnames_position="top", colnames_offset_y = 15) +
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "#f4f4f4", high = "#315b7d", na.value="#f4f4f4") +
new_scale_fill()Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
4.2 Genus overview
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
filter(quality>=5) %>%
group_by(microsample,cryosection,phylum,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary_sort %>%
tt()| genus | mean | sd |
|---|---|---|
| Blautia | 1.415165e-01 | 0.1073365208 |
| Eisenbergiella | 1.211367e-01 | 0.0072496627 |
| Mediterraneibacter | 9.882063e-02 | 0.0080567835 |
| Choladocola | 5.382332e-02 | 0.0181704527 |
| Streptococcus | 5.364124e-02 | 0.0446141635 |
| Agathobaculum | 4.394104e-02 | 0.0120987075 |
| Caccovicinus | 4.336548e-02 | 0.0151276583 |
| Pelethomonas | 3.697454e-02 | 0.0344834189 |
| Lactobacillus | 3.319425e-02 | 0.0257383125 |
| Fimenecus | 3.243428e-02 | 0.0302682673 |
| Gallimonas | 3.044755e-02 | 0.0285881065 |
| Acutalibacter | 2.897240e-02 | 0.0035123478 |
| UBA1417 | 2.524574e-02 | 0.0083481457 |
| Negativibacillus | 2.239375e-02 | 0.0076181853 |
| Lawsonibacter | 2.185606e-02 | 0.0060775371 |
| Anaerobutyricum | 1.960099e-02 | 0.0021010163 |
| Enterocloster | 1.480431e-02 | 0.0074888423 |
| Dysosmobacter | 1.304275e-02 | 0.0093588049 |
| Merdivicinus | 1.203935e-02 | 0.0071049659 |
| Lachnoclostridium_A | 1.185888e-02 | 0.0031752503 |
| Intestinimonas | 1.159075e-02 | 0.0071633881 |
| Rubneribacter | 9.925375e-03 | 0.0013520870 |
| Flavonifractor | 9.403895e-03 | 0.0016224845 |
| Clostridium_Q | 8.511605e-03 | 0.0091097365 |
| Thomasclavelia | 8.455363e-03 | 0.0067802297 |
| Anaeromassilibacillus | 8.097524e-03 | 0.0078246041 |
| Escherichia | 7.594034e-03 | 0.0033677408 |
| Copromonas | 7.283530e-03 | 0.0025945191 |
| Faecousia | 7.123719e-03 | 0.0068608648 |
| Ornithomonoglobus | 6.336604e-03 | 0.0057056932 |
| UMGS1370 | 5.551334e-03 | 0.0041535617 |
| Anaerotignum | 5.047360e-03 | 0.0045683428 |
| Enterenecus | 3.936354e-03 | 0.0141899497 |
| Merdisoma | 3.916381e-03 | 0.0036395670 |
| HGM12545 | 3.633129e-03 | 0.0034388628 |
| Borkfalkia | 3.495036e-03 | 0.0017718409 |
| Pullichristensenella | 3.469869e-03 | 0.0032979124 |
| Scatosoma | 3.325432e-03 | 0.0031174044 |
| Scatomorpha | 3.161439e-03 | 0.0029680201 |
| Sellimonas | 3.093446e-03 | 0.0029526525 |
| Klebsiella | 2.695176e-03 | 0.0030438039 |
| Lachnoclostridium_B | 2.584244e-03 | 0.0024238099 |
| Faecivivens | 2.294980e-03 | 0.0022184880 |
| Akkermansia | 1.712760e-03 | 0.0016207176 |
| Anaerostipes | 1.348611e-03 | 0.0015525474 |
| Anaerotruncus | 1.222109e-03 | 0.0013483618 |
| Evtepia | 1.044212e-03 | 0.0011793775 |
| Ruthenibacterium | 9.754233e-04 | 0.0011289357 |
| Gallacutalibacter | 9.580371e-04 | 0.0012464595 |
| Roslinia | 8.184423e-04 | 0.0010738386 |
| Scatavimonas | 6.521948e-04 | 0.0008791553 |
| HGM12998 | 2.844308e-04 | 0.0005424567 |
| Merdimonas | 2.816320e-04 | 0.0008337256 |
| Massilimicrobiota | 2.765415e-04 | 0.0005917204 |
| Romboutsia | 2.042220e-04 | 0.0004908791 |
| UBA4716 | 1.315085e-04 | 0.0003607827 |
| Clostridium_AQ | 1.240170e-04 | 0.0003620509 |
| Harrysmithimonas | 9.739388e-05 | 0.0005322090 |
| Gordonibacter | 6.437947e-05 | 0.0002756017 |
| Acetatifactor | 0.000000e+00 | 0.0000000000 |
| Alangreenwoodia | 0.000000e+00 | 0.0000000000 |
| Alistipes | 0.000000e+00 | 0.0000000000 |
| An181 | 0.000000e+00 | 0.0000000000 |
| Angelakisella | 0.000000e+00 | 0.0000000000 |
| Avimicrobium | 0.000000e+00 | 0.0000000000 |
| Avoscillospira | 0.000000e+00 | 0.0000000000 |
| Bifidobacterium | 0.000000e+00 | 0.0000000000 |
| Blautia_A | 0.000000e+00 | 0.0000000000 |
| Butyricicoccus | 0.000000e+00 | 0.0000000000 |
| CAG-245 | 0.000000e+00 | 0.0000000000 |
| CAG-269 | 0.000000e+00 | 0.0000000000 |
| CAG-273 | 0.000000e+00 | 0.0000000000 |
| CAG-302 | 0.000000e+00 | 0.0000000000 |
| CAG-313 | 0.000000e+00 | 0.0000000000 |
| CAJFPI01 | 0.000000e+00 | 0.0000000000 |
| CAJFUH01 | 0.000000e+00 | 0.0000000000 |
| Caccenecus | 0.000000e+00 | 0.0000000000 |
| Caccomorpha | 0.000000e+00 | 0.0000000000 |
| Caccousia | 0.000000e+00 | 0.0000000000 |
| Catenibacillus | 0.000000e+00 | 0.0000000000 |
| Coprocola | 0.000000e+00 | 0.0000000000 |
| Coproplasma | 0.000000e+00 | 0.0000000000 |
| Egerieicola | 0.000000e+00 | 0.0000000000 |
| Enterococcus | 0.000000e+00 | 0.0000000000 |
| Enterococcus_B | 0.000000e+00 | 0.0000000000 |
| Enterococcus_D | 0.000000e+00 | 0.0000000000 |
| Eubacterium_R | 0.000000e+00 | 0.0000000000 |
| Faecalibacterium | 0.000000e+00 | 0.0000000000 |
| Faeciplasma | 0.000000e+00 | 0.0000000000 |
| Fimicola | 0.000000e+00 | 0.0000000000 |
| Fimimorpha | 0.000000e+00 | 0.0000000000 |
| Fimivicinus | 0.000000e+00 | 0.0000000000 |
| Fournierella | 0.000000e+00 | 0.0000000000 |
| Gallispira | 0.000000e+00 | 0.0000000000 |
| Galloscillospira_A | 0.000000e+00 | 0.0000000000 |
| Gemmiger | 0.000000e+00 | 0.0000000000 |
| Heritagella | 0.000000e+00 | 0.0000000000 |
| Heteroclostridium | 0.000000e+00 | 0.0000000000 |
| Holdemania | 0.000000e+00 | 0.0000000000 |
| Hungatella_B | 0.000000e+00 | 0.0000000000 |
| JAETTH01 | 0.000000e+00 | 0.0000000000 |
| Ligilactobacillus | 0.000000e+00 | 0.0000000000 |
| Limosilactobacillus | 0.000000e+00 | 0.0000000000 |
| Merdibacter | 0.000000e+00 | 0.0000000000 |
| Metalachnospira | 0.000000e+00 | 0.0000000000 |
| Neoanaerotignum_A | 0.000000e+00 | 0.0000000000 |
| Onthovicinus | 0.000000e+00 | 0.0000000000 |
| Ornithomonoglobus_A | 0.000000e+00 | 0.0000000000 |
| Paenibacillus_A | 0.000000e+00 | 0.0000000000 |
| Pararuminococcus | 0.000000e+00 | 0.0000000000 |
| Proteus | 0.000000e+00 | 0.0000000000 |
| Pseudobutyricicoccus | 0.000000e+00 | 0.0000000000 |
| Pullilachnospira | 0.000000e+00 | 0.0000000000 |
| RUG591 | 0.000000e+00 | 0.0000000000 |
| RUG626 | 0.000000e+00 | 0.0000000000 |
| Ruminococcus_G | 0.000000e+00 | 0.0000000000 |
| Salmonella | 0.000000e+00 | 0.0000000000 |
| Sarcina | 0.000000e+00 | 0.0000000000 |
| Scybalenecus | 0.000000e+00 | 0.0000000000 |
| Spyradocola | 0.000000e+00 | 0.0000000000 |
| Timburyella | 0.000000e+00 | 0.0000000000 |
| Tyzzerella | 0.000000e+00 | 0.0000000000 |
| UMGS775 | 0.000000e+00 | 0.0000000000 |
| UMGS856 | 0.000000e+00 | 0.0000000000 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary %>%
mutate(genus=factor(genus,levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=colors_alphabetic) +
geom_jitter(alpha=0.3) +
facet_grid(.~cryosection)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")